/***
This do-file creates the CPS national series by wage quartile used in our 
analysis, but excluding states that have experienced changes in their minimum 
wage (CA, MA, NY). We process the CPS data in the most analogous way possible
to our processing in the employment pipeline, so we apply the same methodology 
to define the wage quartiles.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create directories
cap mkdir "${root}/data/derived/CPS"

*-------------------------------------------------------------------------------
* 1 - Get multipliers
*-------------------------------------------------------------------------------

* Open Thresholds
project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
use "${root}/data/dvc/Employment/poverty_thresholds.dta", clear 

* Get multipliers
cap drop multiplier 
	foreach poverty in 100 150 250 {
		
		* Create variable 
		gen multiplier_`poverty' = .

		if `poverty' == 150 local levels = "19 20 21 22 23"
		else if `poverty' == 100 local levels = "13 14 15 16"

		foreach level of local levels {
			replace multiplier_`poverty' = poverty_`poverty' - `=`level'-0.5' if inrange(poverty_`poverty', `=`level'-0.5', `=`level'+0.5')
			replace multiplier_`poverty' = multiplier_`poverty' - 1 if inrange(poverty_`poverty', `=`level'', `=`level'+0.5')
		}
	}
	
tempfile multiplier
save `multiplier'

*-------------------------------------------------------------------------------
* 2 - Import CPS and apply sample restrictions
*-------------------------------------------------------------------------------

* Open CPS data
project, uses("${root}/data/dvc/CPS/cps_00037.dta")
use "${root}/data/dvc/CPS/cps_00037.dta", clear 

* Sample restrictions
assert !mi(age, year)
keep if age >= 16
keep if year > 2019

* Create NAICS
gen naics = . 
replace naics = 11 if inrange(ind, 0170, 0290)
replace naics = 21 if inrange(ind, 0370, 0490)
replace naics = 23 if inrange(ind, 0770, 0770)
replace naics = 31 if inrange(ind, 1070, 1790)
replace naics = 32 if inrange(ind, 1870, 2590)
replace naics = 33 if inrange(ind, 2670, 3990)
replace naics = 42 if inrange(ind, 4070, 4590)
replace naics = 44 if inrange(ind, 4670, 5190)
replace naics = 45 if inrange(ind, 5275, 5790)
replace naics = 48 if inrange(ind, 6070, 6290)
replace naics = 49 if inrange(ind, 6370, 6390)
replace naics = 22 if inrange(ind, 0570, 0690)
replace naics = 51 if inrange(ind, 6470, 6780)
replace naics = 52 if inrange(ind, 6870, 6992)
replace naics = 53 if inrange(ind, 7070, 7190)
replace naics = 54 if inrange(ind, 7270, 7490)
replace naics = 55 if inrange(ind, 7570, 7570)
replace naics = 56 if inrange(ind, 7580, 7790)
replace naics = 61 if inrange(ind, 7860, 7890)
replace naics = 62 if inrange(ind, 7970, 8470)
replace naics = 71 if inrange(ind, 8560, 8590)
replace naics = 72 if inrange(ind, 8660, 8690)
replace naics = 81 if inrange(ind, 8770, 9290)
replace naics = 92 if inrange(ind, 9370, 9890)
						
* Be consistent with PIE and CES series
gen naics_code = ""
replace naics_code = "11" if naics == 11
replace naics_code = "21" if naics == 21
replace naics_code = "22" if naics == 22
replace naics_code = "23" if naics == 23
replace naics_code = "3133" if naics == 31 | naics == 32 | naics == 33
replace naics_code = "42" if naics == 42
replace naics_code = "4445" if naics == 44 | naics == 45
replace naics_code = "4849" if naics == 48 | naics == 49
replace naics_code = "51" if naics == 51
replace naics_code = "52" if naics == 52
replace naics_code = "53" if naics == 53
replace naics_code = "54" if naics == 54
replace naics_code = "55" if naics == 55
replace naics_code = "56" if naics == 56
replace naics_code = "61" if naics == 61
replace naics_code = "62" if naics == 62
replace naics_code = "71" if naics == 71
replace naics_code = "72" if naics == 72
replace naics_code = "81" if naics == 81
				
* Drop some sectors according to BLS adjustment 
drop if naics == 92 		// drop those working in public sector to match CES (Total Private Employment)
drop if naics == 11 		// drop those working in agriculture, forestry, fishing, and hunting according to BLS adjustment of CPS to CES
drop if naics == 9290 		// drop workers in private households such as nannies, housekeepers, etc.
			
* Drop some classes of workers according to BLS adjustment
drop if inlist(classwkr, 0, 13, 25, 26, 27, 28, 29) 	// drop missing (0), unincorporated, self-employed (13), and all public sector employees (25-29) 
			
* Keep those with jobs 
keep if empstat == 10
			
* Convert to super sector
gen naics_ss = ""
replace naics_ss = "10" if inlist(naics_code, "11", "21")
replace naics_ss = "20" if inlist(naics_code, "23")
replace naics_ss = "30" if inlist(naics_code, "31-33")
replace naics_ss = "40" if inlist(naics_code, "42", "44-45", "48-49", "22")
replace naics_ss = "50" if inlist(naics_code, "51")
replace naics_ss = "55" if inlist(naics_code, "52", "53")
replace naics_ss = "60" if inlist(naics_code, "54", "55", "56")
replace naics_ss = "65" if inlist(naics_code, "61", "62")
replace naics_ss = "70" if inlist(naics_code, "71", "72")
replace naics_ss = "80" if inlist(naics_code, "81")
			
* Reformat NAICS codes  
replace naics_code = subinstr(naics_code, "-", "_", .) 
			
* Define hourly wages
cap drop wage
replace earnweek = . if earnweek > 9999 
replace uhrswork1 = . if uhrswork1 > 996
replace hourwage = . if hourwage > 999
replace hourwage = earnweek / uhrswork1 if mi(hourwage) & paidhour == 2  		// if paid hourly, divide weekly earnings by amount of hours usually worked
gen wage = hourwage if paidhour == 2
replace wage = earnweek / uhrswork1 if paidhour == 1

replace wage = 100 if wage > 100 & !mi(wage)
replace wage = 5 if wage < 5

* Add random noise to integer wages to avoid issues associated with wage bunching
set seed 1280
gen random_noise = runiform(-0.5, 0.5)
gen wage_smoothed = wage + random_noise if wage == round(wage, 1) & inlist(wage, 13, 14, 15, 16, 19, 20, 21, 22, 23)
replace wage = wage + random_noise if wage == round(wage, 1) & inlist(wage, 13, 14, 15, 16, 19, 20, 21, 22, 23)

gen date = mdy(month, 15, year)
format date %td

*-------------------------------------------------------------------------------
* 3 - Merge poverty thresholds
*-------------------------------------------------------------------------------
 
project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
merge m:1 date using "${root}/data/dvc/Employment/poverty_thresholds.dta", assert(2 3) keep(3) nogen

* 4 - Gen quartiles
gen quartile = 1 if wage <= poverty_100 & !mi(wage)
replace quartile = 2 if wage > poverty_100 & wage <= poverty_150 & !mi(wage)
replace quartile = 3 if wage > poverty_150 & wage <= poverty_250 & !mi(wage)
replace quartile = 4 if wage > poverty_250 & !mi(wage)

*-------------------------------------------------------------------------------
* 5 - Smooth round numbers
*-------------------------------------------------------------------------------

* Get mass at integer wages
gen count = 1

* Collapse 
collapse (sum) employment_cps = count  [pw=earnwt], by(date quartile)  

* Get poverty thresholds again 
project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
merge m:1 date using "${root}/data/dvc/Employment/poverty_thresholds.dta", keep(3) nogen 

gegen cell = group(quartile date)

rename employment_cps emp_cps_smooth

*-------------------------------------------------------------------------------
* 6 - Norm
*-------------------------------------------------------------------------------

gen jan = emp_cps_smooth if month(date) == 1 & year(date) == 2020 
bys quartile: gegen base = mean(jan) 
gen norm_emp_cps = 100 * (emp_cps_smooth / base - 1) 
drop if mi(quartile)
drop base jan

sort quartile date 

keep date quartile norm_emp_cps

save "${root}/data/derived/CPS/CPS by wage quartile excluding NY CA MA.dta", replace
project, creates("${root}/data/derived/CPS/CPS by wage quartile excluding NY CA MA.dta")
